將資料下載下來,儲存到自己的工作空間。請點此。因Mac與Windows預設編碼不同,請下載屬於自己的檔案
打開R studio,建立一個新專案,指定路徑到你的工作空間
data_EPA <- read.csv("環保署列管污染農地.csv")
data_TARI <- read.csv("農地重金屬超標未列管.csv")
# install.packages("dplyr")
library(dplyr)
#因為沒有全部要用,擷取需要用到的資訊,並另外命名為英文
data_EPA <- data_EPA %>% select(county=1, coordinate=3, area=7, control_date=8, free_date=10)
summarise_data <- data_EPA %>% group_by(county) %>%
summarise(count = n(),
sum_area = sum(area),
control_area = sum(area[free_date=="無"]),
free_area = sum(area[free_date!="無"]),
count_control = sum(free_date=="無"),
count_free = sum(free_date!="無")) %>%
mutate(avg_area = sum_area/count,
ratio_control = sprintf("%1.0f%%",count_control/count*100),
ratio_free = sprintf("%1.0f%%",count_free/count*100))
#把列管時間轉成日期格式
data_EPA$control_date <- data_EPA$control_date %>% as.Date()
data_EPA$free_date <- data_EPA$free_date %>% as.Date()
#把尚未解除列管的時間帶資料收集截止時間"2016-12-18",以便計算列管時間
data_EPA$free_date <- replace(data_EPA$free_date,which(is.na(data_EPA$free_date)),as.Date("2016-12-18"))
#計算列管月份
data_EPA$totol_month <- NA
for(i in 1:nrow(data_EPA)){
tryCatch({ #因為資料裡有其中一筆沒有列管時間,我們加tryCatch讓他忽略錯誤,讓迴圈可以正常執行完
data_EPA$totol_month[i] <-
length(seq(from=data_EPA$control_date[i], to=data_EPA$free_date[i], by='month'))
}, error=function(e){})
}
#計算各縣市列管中與解除列管的平均列管月份
avg_month <- data_EPA %>% group_by(county) %>%
summarise(control_month = mean(totol_month[free_date=="2016-12-18"]) %>% round(),
free_month = mean(totol_month[free_date!="2016-12-18"],na.rm = TRUE)%>%
round()) #因為資料中有一筆沒有列管時間,因此把參數na.rm改為TRUE,才可以運算
#把無列管中的平均月份帶0
avg_month[is.na(avg_month)] <- 0
#合併回summarise_data
summarise_data <- summarise_data %>% left_join(avg_month, by="county")
tmp <- colSums(summarise_data[,2:8]) %>% t() %>% as.data.frame() %>%
mutate(ratio_control = sprintf("%1.0f%%",count_control/count*100),
ratio_free = sprintf("%1.0f%%",count_free/count*100)) %>%
cbind(.,colMeans(summarise_data[,11:12])%>% round() %>% t() %>% as.data.frame()) %>%
mutate(county="全國") %>%
select(12,1:11)
summarise_data <- rbind(tmp, summarise_data)
| county | count | sum_area | control_area | free_area | count_control | count_free | avg_area | ratio_control | ratio_free | control_month | free_month |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 全國 | 5485 | 9263582.55 | 3576260.23 | 5687322.33 | 2468 | 3017 | 51145.5310 | 45% | 55% | 40 | 35 |
| 宜蘭縣 | 5 | 11798.15 | 3156.64 | 8641.51 | 1 | 4 | 2359.6300 | 20% | 80% | 98 | 22 |
| 南投縣 | 11 | 4046.00 | 2747.00 | 1299.00 | 4 | 7 | 367.8182 | 36% | 64% | 167 | 103 |
| 屏東縣 | 3 | 75150.81 | 1235.16 | 73915.65 | 1 | 2 | 25050.2700 | 33% | 67% | 6 | 22 |
| 苗栗縣 | 37 | 43766.63 | 5729.23 | 38037.40 | 5 | 32 | 1182.8819 | 14% | 86% | 12 | 27 |
| 桃園市 | 1890 | 2511211.33 | 1216032.30 | 1295179.03 | 1060 | 830 | 1328.6832 | 56% | 44% | 48 | 42 |
| 高雄市 | 49 | 84948.44 | 0.00 | 84948.44 | 0 | 49 | 1733.6416 | 0% | 100% | 0 | 32 |
| 雲林縣 | 25 | 58230.54 | 4329.00 | 53901.54 | 2 | 23 | 2329.2216 | 8% | 92% | 78 | 31 |
| 新北市 | 13 | 37235.00 | 0.00 | 37235.00 | 0 | 13 | 2864.2308 | 0% | 100% | 0 | 22 |
| 新竹市 | 202 | 362010.73 | 2746.50 | 359264.23 | 2 | 200 | 1792.1323 | 1% | 99% | 7 | 27 |
| 嘉義市 | 19 | 46035.64 | 12938.00 | 33097.64 | 5 | 14 | 2422.9284 | 26% | 74% | 28 | 49 |
| 嘉義縣 | 2 | 4740.96 | 0.00 | 4740.96 | 0 | 2 | 2370.4800 | 0% | 100% | 0 | 16 |
| 彰化縣 | 2503 | 5038536.34 | 2282395.32 | 2756141.02 | 1340 | 1163 | 2012.9989 | 54% | 46% | 25 | 28 |
| 臺中市 | 600 | 742225.90 | 10292.02 | 731933.88 | 11 | 589 | 1237.0432 | 2% | 98% | 32 | 30 |
| 臺北市 | 22 | 48852.15 | 0.00 | 48852.15 | 0 | 22 | 2220.5523 | 0% | 100% | 0 | 37 |
| 臺南市 | 104 | 194793.93 | 34659.06 | 160134.87 | 37 | 67 | 1873.0186 | 36% | 64% | 105 | 30 |
# install.packages("plotly")
library(plotly)
plot_ly( summarise_data, x = ~county, y = ~count_control, type = "bar") %>%
layout(title = "所有案件",
xaxis = list(title = 'county'),
yaxis = list(title = 'count'),
width = 750, height = 400)
plot_ly(summarise_data, x = ~county, y = ~count_control, type = 'bar',
name = '列管案件', text = ~ratio_control) %>%
add_trace(y = ~count_free, name = '解除列管案件', text = ~ratio_free) %>%
layout(xaxis = list(title = "", tickangle = -45),
yaxis = list(title = 'count'), barmode = 'group',
width = 800, height = 400)
#列管縣市太多,很多面積很小,取場址面積大於中位數的來畫圖
summarise_data1 <- summarise_data %>% arrange(sum_area %>% desc) %>% filter(sum_area > median(sum_area))
plot_ly(summarise_data1, labels = ~county, values = ~sum_area, type = 'pie',
textinfo = 'label+percent') %>% layout(title = '場址面積總和', width = 800, height = 500,
margin=list(l = 100, r = 50, b = 100,t = 100,pad = 4))
將經緯度資訊從TWD07轉為WGS84
先下載轉碼程式包並執行
把經緯度資訊抓出來,分割成x軸與y軸
head(data_EPA,3) #可以看到座標欄位裡面有:和,
## county coordinate area control_date free_date totol_month
## 1 臺北市 X:301277,Y:2777431 387 2002-10-04 2004-11-02 25
## 2 臺北市 X:301267,Y:2777372 7738 2002-10-04 2004-11-02 25
## 3 臺北市 X:301310,Y:2777998 580 2004-01-27 2005-03-01 14
TWD97 <- data_EPA$coordinate %>% as.character() %>%
stringr::str_split(.,'[,:]',simplify = TRUE) %>%
as.data.frame(., stringsAsFactors=FALSE) %>% select(x=2,y=4)
WGS84 <- TWD97TM2toWGS84(TWD97$x, TWD97$y) %>% as.data.frame()
#合併回原本的data
data_EPA$coordinate <- NULL
data_EPA <- cbind(data_EPA,TWD97,WGS84)
head(data_EPA)
## county area control_date free_date totol_month x y
## 1 臺北市 387.00 2002-10-04 2004-11-02 25 301277 2777431
## 2 臺北市 7738.00 2002-10-04 2004-11-02 25 301267 2777372
## 3 臺北市 580.00 2004-01-27 2005-03-01 14 301310 2777998
## 4 臺北市 3205.65 2004-12-10 2008-04-18 41 299512 2779782
## 5 臺北市 2855.14 2004-12-10 2008-04-18 41 299518 2779865
## 6 臺北市 3513.00 2004-12-10 2008-04-18 41 299669 2779961
## lat lon
## 1 25.10434 121.5084
## 2 25.10381 121.5083
## 3 25.10946 121.5088
## 4 25.12562 121.4910
## 5 25.12637 121.4911
## 6 25.12723 121.4926
data_TARI <- data_TARI %>% mutate(CUAR= 2.035*CU + 11.884,
CUAR_OVER = ifelse(CUAR>200,1,0),
ZNAR = 2.487*ZN + 89.711,
ZNAR_OVER = ifelse(ZNAR>600,1,0),
CDAR = 1.4578 *CD + 0.0323,
CDAR_OVER = ifelse(CDAR>5,1,0),
CRAR = 17.35 * CR+ 31.91,
CRAR_OVER = ifelse(CRAR>250,1,0),
NIAR = 5.13 * NI+ 14.56,
NIAR_OVER = ifelse(NIAR>200,1,0),
PBAR = 2.811 *PB+ 6.715,
PBAR_OVER = ifelse(PBAR>500,1,0),
OVER = ifelse(CUAR_OVER+ZNAR_OVER+CDAR_OVER+CRAR_OVER+
NIAR_OVER+PBAR_OVER>=1,1,0))
data_EPA_monitor<- data_EPA%>% filter(free_date=="2016-12-18")
for(i in 1:nrow(data_TARI)){
data_TARI$mindistance[i]= min(na.exclude(sqrt((data_TARI$XLO[i]-as.integer(data_EPA_monitor$x))^2+(data_TARI$YLO[i]-as.integer(data_EPA_monitor$y))^2)))
}
for(i in 1:nrow(data_TARI)){
data_TARI$minover[i] <- sum(na.exclude(sqrt((data_TARI$XLO[i]-data_TARI$XLO)^2+(data_TARI$YLO[i]-data_TARI$YLO)^2))<=1000)
}
data_TARI <- data_TARI %>% mutate(Is_monitored=ifelse(mindistance<=200,1,0))
library(scales)
stat_over <- data_TARI %>% group_by(V2) %>%
summarise(`重金屬超標`=sum(OVER),
`超標未列管`=sum(ifelse(OVER+Is_monitored==1,1,0))) %>%
mutate(`超標未列管比例`=percent(`超標未列管`/`重金屬超標`),
`超標列管比例`=percent(1-`超標未列管`/`重金屬超標`))
# install.packages("leaflet")
library(leaflet)
#先把要點上圖的資料篩選出來
EPA <- data_EPA %>% filter(free_date=="2016-12-18") %>% select(lat,lon,area)
TARI <- data_TARI %>% select(lat = Y,lon = X, area = AREA)
leaflet() %>% setView(lng=120.58,lat=23.58,zoom=8) %>%
addProviderTiles("Esri.WorldImagery") %>%
addCircles(data = EPA, color = "red",lng = ~lon, lat = ~lat, weight = 1, radius = ~sqrt(area/pi)) %>%
addCircles(data = TARI, color = "yellow",lng = ~lon, lat = ~lat, weight = 1, radius = ~sqrt(area/pi))
TARI <- data_TARI %>% select(lat = Y,lon = X, area = AREA ,mindistance) %>% mutate(log_mindistance=log(mindistance))
leaflet() %>%
addCircles(data=TARI,lng = ~lon, lat = ~lat, weight = 2,
color=~colorNumeric(c("#CD0000", "#FFFFFF","#0B752F"),
log_mindistance)(log_mindistance)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addLegend(position = 'topleft',
pal =colorNumeric(c("#CD0000","#FFFFFF","#0B752F"),
domain=TARI$log_mindistance),
values=TARI$log_mindistance,
title = 'log-mindistance')